PS5

Author

Daniel Rubio

PS5

Daniel Rubio, 11/21/24

As an aid to my problem solving, I used UM GPT to probe for explanations when I was confused, and for suggestions of possible built in functions to use. I did not copy and paste the prompts into the AI, but rather used it to answer the questions I had as they arose.

Problem 1: Object Oriented Programming

(a) Defining “rational” class

Defining requested Rcpp functions:

library(Rcpp)

#' Function that calls down to cpp and finds GCD
#'
#' @param A int
#' @param B int
#'
#' @return Greatest common denominator
#'
cGCD <- function(A,B) {
  ans <- evalCpp(paste0("std::gcd(",A, ", ", B, ")"))
  return(ans)
}
#' Function that calls down to cpp and finds LCM
#'
#' @param A int
#' @param B int
#'
#' @return Greatest common multiple
#'
cLCM <- function(A,B) {
  ans <- evalCpp(paste0("std::lcm(",A, ", ", B, ")"))
  return(ans)
}

Defining class:

rational <- setClass("rational",
         slots = c(data = "numeric"))

setValidity('rational',function(object){
  # object must be in form c(numerator, denominator)
  if(length(object@data) != 2){
    stop("Input must be vector of length two")
  }
  if(object@data[1]!= round(object@data[1]) | object@data[2]!= round(object@data[2] )){
    stop("Numerator and denominator must be integers")
  }
  if(object@data[2] == 0){
    stop("Denominator must be non-zero")
  }
  return(TRUE)
})
Class "rational" [in ".GlobalEnv"]

Slots:
              
Name:     data
Class: numeric

Methods:

setMethod("show", "rational",
  function(object) {
    cat(object@data[1], "/", object@data[2])
    cat("\n")
    return(invisible(object))
  }
)

setGeneric("simplify",
           function(object) {
             standardGeneric("simplify")})
[1] "simplify"
setMethod("simplify", "rational",
  function(object) {
    num <- object@data[1]
    den <- object@data[2]
    com_denom <- cGCD(num,den)
    new_num <- num / com_denom
    new_den <- den / com_denom
    simplified <- rational(data = c(new_num,new_den))
    return(simplified)
  }
)
setGeneric("quotient",
           function(object,...) {
             standardGeneric("quotient")})
[1] "quotient"
setMethod("quotient", "rational",
  function(object, digits = 8) {
    num <- object@data[1]
    den <- object@data[2]
    result <- num/den
    if(round(result,digits) == round(result,digits+1)){
      rest = ""
    }
    else{rest="..."}
    cat(round(result, digits),rest, sep="")
    cat("\n")
    return(invisible(result))
  }
)

Operations:

##' @title `rational` addition
##'
##'
##' @param e1 A `rational`
##' @param e2 A `rational`
##' @return A `rational` that is the sum of e1 and e2
setMethod("+", signature(e1 = "rational", e2 = "rational"),
            function(e1, e2) {
            n1 <- e1@data[1]
            d1 <- e1@data[2]
            n2 <- e2@data[1]
            d2 <- e2@data[2]
            N = n1*d2+n2*d1
            D = d1*d2
            return(simplify(rational(data=c(N,D))))
            }
          )
##' @title `rational` subtraction
##'
##'
##' @param e1 A `rational`
##' @param e2 A `rational`
##' @return A `rational` that is the difference of e1 and e2
setMethod("-", signature(e1 = "rational", e2 = "rational"),
            function(e1, e2) {
            n1 <- e1@data[1]
            d1 <- e1@data[2]
            n2 <- e2@data[1]
            d2 <- e2@data[2]
            N = n1*d2-n2*d1
            D = d1*d2
            return(simplify(rational(data=c(N,D))))
            }
          )
##' @title `rational` multiplication
##'
##'
##' @param e1 A `rational`
##' @param e2 A `rational`
##' @return A `rational` that is the multiplication of e1 and e2
setMethod("*", signature(e1 = "rational", e2 = "rational"),
            function(e1, e2) {
            n1 <- e1@data[1]
            d1 <- e1@data[2]
            n2 <- e2@data[1]
            d2 <- e2@data[2]
            N = n1*n2
            D = d1*d2
            return(simplify(rational(data=c(N,D))))
            }
          )
##' @title `rational` division
##'
##'
##' @param e1 A `rational`
##' @param e2 A `rational`
##' @return A `rational` that is e1 divided by e2
setMethod("/", signature(e1 = "rational", e2 = "rational"),
            function(e1, e2) {
            n1 <- e1@data[1]
            d1 <- e1@data[2]
            n2 <- e2@data[1]
            d2 <- e2@data[2]
            N = n1*d2
            D = d1*n2
            return(simplify(rational(data=c(N,D))))
            }
          )

(b) Using “rational” class

r1 = rational(data = c(24,6))
r2 = rational(data = c(7,230))
r3 = rational(data = c(0,4))
r1
24 / 6
r3
0 / 4
r1 + r2
927 / 230
r1 - r2
913 / 230
r1 * r2
14 / 115
r1 / r2
920 / 7
r1 + r3
4 / 1
r1 * r3
0 / 1
r2 / r3
Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'simplify': Denominator must be non-zero
quotient(r1)
4
quotient(r2)
0.03043478...
quotient(r2, digits = 3)
0.03...
quotient(r2, digits = 3.14)
0.03...
quotient(r2, digits = "avocado")
Error in round(result, digits): non-numeric argument to mathematical function
q2 <- quotient(r2, digits = 3)
0.03...
q2
[1] 0.03043478
quotient(r3)
0
simplify(r1)
4 / 1
simplify(r2)
7 / 230
simplify(r3)
0 / 1

(c) Not creating impossible rationals, other bad inputs

r4 = rational(data = c(1,0))
Error in validityMethod(object): Denominator must be non-zero
r5 = rational(data = c(1,2,3))
Error in validityMethod(object): Input must be vector of length two
r6 = rational(data = c(1))
Error in validityMethod(object): Input must be vector of length two
r7 = rational(data = c("numerator","denominator"))
Error in validObject(.Object): invalid class "rational" object: invalid object for slot "data" in class "rational": got class "character", should be or extend class "numeric"
r8 = rational(data = c(1.2,4))
Error in validityMethod(object): Numerator and denominator must be integers
r8 = rational(data = c(1,4.3))
Error in validityMethod(object): Numerator and denominator must be integers
r9 = rational(data = 2/9)
Error in validityMethod(object): Input must be vector of length two
rm(list=ls())

Problem 2: plotly

(a)

Load in data

data <- read.csv("df_for_ml_improved_new_market.csv")
# install.packages("tidyverse") ran once
# install.packages("plotly") ran once
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

Regenerate plot

genres = c("Photography","Print","Sculpture","Painting","Others")

data_long <- data %>% 
  pivot_longer(cols=starts_with("Genre___"), names_to = "Genre",values_to = "value") %>%
  filter(value==1) %>% 
  select(-value) %>% 
  mutate(
    Genre = fct_recode(
      Genre,
      "Others" = "Genre___Others",
      "Photography" = "Genre___Photography",
      "Print" = "Genre___Print",
      "Sculpture" = "Genre___Sculpture",
      "Painting" = "Genre___Painting"
    )
  )

g <- ggplot(data_long, aes(x=year,group=year, y=price_usd ) ) +
       geom_boxplot() +
  ggtitle("Artwork price Statistics by Genre 1997-2012")+
  scale_y_log10(name = "Price (USD)") +
  scale_x_continuous(breaks=seq(1997,2012, by=4), name="Year")+
  facet_wrap("Genre")
g

(b)

Creating Interactive plot with plotly

p <- plot_ly(data=data_long) |>
  add_boxplot(x = ~ year, y = ~ price_usd) |>
  add_boxplot(data=data_long[data_long$Genre=="Photography",],x = ~ year, y = ~ price_usd,visible=FALSE) |>
  add_boxplot(data=data_long[data_long$Genre=="Print",],x = ~ year, y = ~ price_usd,visible=FALSE) |>
  add_boxplot(data=data_long[data_long$Genre=="Sculpture",],x = ~ year, y = ~ price_usd,visible=FALSE) |>
  add_boxplot(data=data_long[data_long$Genre=="Painting",],x = ~ year, y = ~ price_usd,visible=FALSE) |>
  add_boxplot(data=data_long[data_long$Genre=="Others",],x = ~ year, y = ~ price_usd,visible=FALSE) |>
  layout(updatemenus = list(
  list(y = 1,
    buttons = list(
      list(method = "update",
           args = list(list(visible =  list(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)),
                       list(yaxis = list(title = "Price (USD)",type="log"))),
           label = "Over Time, Overall"),
      
      list(method = "update",
           args = list(list(visible =  list(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE)),
                       list(yaxis = list(title = "Price (USD)",type="log"))),
           label = "Photography"),
      list(method = "update",
           args = list(list(visible =  list(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE)),
                       list(yaxis = list(title = "Price (USD)",type="log"))),
           label = "Print"),
      list(method = "update",
           args = list(list(visible =  list(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE)),
                       list(yaxis = list(title = "Price (USD)",type="log"))),
           label = "Sculpture"),
      list(method = "update",
           args = list(list(visible =  list(FALSE, FALSE, FALSE, FALSE, TRUE, FALSE)),
                       list(yaxis = list(title = "Price (USD)",type="log"))),
           label = "Painting"),

      list(method = "update",
           args = list(list(visible =  list(FALSE, FALSE, FALSE, FALSE, FALSE, TRUE)),
                       list(yaxis = list(title = "Price (USD)",type="log"))),
           label = "Others"))
  )
)) |>
  layout(yaxis = list(title = "Price (USD)",type = "log"), 
         xaxis = list(title = "Year"),
         title = "Artwork price Statistics by Genre 1997-2012") 

p
rm(list=ls())

Problem 3: PS4-1 data.table

Setting up problem with data.table

#install.packages("nycflights13") # Ran once
# install.packages("tidyverse") # Ran once
library(nycflights13)
library(data.table)

Attaching package: 'data.table'
The following objects are masked from 'package:lubridate':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year
The following objects are masked from 'package:dplyr':

    between, first, last
The following object is masked from 'package:purrr':

    transpose
flights <- as.data.table(flights)
airports <- as.data.table(airports)
planes <- as.data.table(planes)

(a)

flights[ , 
         .(mean_delay = mean(dep_delay, na.rm = TRUE),
            median_delay = median(dep_delay,na.rm = TRUE))
         ,by=origin] |> # First dt call calculates mean  and median delay by origin
  merge(x=_, airports,
        by.x = "origin",
        by.y = "faa")|> # merge with airports table to get name joining on  origin/faa code
  _[ , .(name, median_delay, mean_delay)] # print out name and stats
                  name median_delay mean_delay
                <char>        <num>      <num>
1: Newark Liberty Intl           -1   15.10795
2: John F Kennedy Intl           -1   12.11216
3:          La Guardia           -3   10.34688
flights[, .N , by = dest] |> # gets how many rows of each dest there are 
  _[N>=10] |>  # filters all the rows with less than 10 flights
  merge(x = _, y = flights)|> # merges these rows applying the filter to the main data
  _[ , .(mean_delay = mean(arr_delay, na.rm = TRUE),
        median_delay = median(arr_delay,na.rm = TRUE))
    , by = dest] |> # call calculates mean and median arrival by dest on filtered dests
  merge(x=_, airports,
        by.x = "dest",
        by.y = "faa")|># merge with airports table to get name joining on  dest/faa code
  _[ order(-mean_delay), .(name, median_delay, mean_delay)] # print out name and stats
                                    name median_delay   mean_delay
                                  <char>        <num>        <num>
 1:                Columbia Metropolitan         28.0  41.76415094
 2:                           Tulsa Intl         14.0  33.65986395
 3:                    Will Rogers World         16.0  30.61904762
 4:                 Jackson Hole Airport         15.0  28.09523810
 5:                        Mc Ghee Tyson          2.0  24.06920415
 6:               Dane Co Rgnl Truax Fld          1.0  20.19604317
 7:                        Richmond Intl          1.0  20.11125320
 8:        Akron Canton Regional Airport          3.0  19.69833729
 9:                      Des Moines Intl          0.0  19.00573614
10:                   Gerald R Ford Intl          1.0  18.18956044
11:                      Birmingham Intl         -2.0  16.87732342
12:         Theodore Francis Green State          1.0  16.23463687
13: Greenville-Spartanburg International         -0.5  15.93544304
14:    Cincinnati Northern Kentucky Intl         -3.0  15.36456376
15:            Savannah Hilton Head Intl         -1.0  15.12950601
16:          Manchester Regional Airport         -3.0  14.78755365
17:                          Eppley Afld         -2.0  14.69889841
18:                               Yeager         -1.5  14.67164179
19:                     Kansas City Intl          0.0  14.51405836
20:                          Albany Intl         -4.0  14.39712919
21:                General Mitchell Intl          0.0  14.16722038
22:                       Piedmont Triad         -2.0  14.11260054
23:               Washington Dulles Intl         -3.0  13.86420212
24:               Cherry Capital Airport        -10.0  12.96842105
25:              James M Cox Dayton Intl         -3.0  12.68048606
26:     Louisville International Airport         -2.0  12.66938406
27:                  Chicago Midway Intl         -1.0  12.36422360
28:                      Sacramento Intl          4.0  12.10992908
29:                    Jacksonville Intl         -2.0  11.84483416
30:                       Nashville Intl         -2.0  11.81245891
31:                Portland Intl Jetport         -4.0  11.66040210
32:               Greater Rochester Intl         -5.0  11.56064461
33:      Hartsfield Jackson Atlanta Intl         -1.0  11.30011285
34:                Lambert St Louis Intl         -3.0  11.07846451
35:                         Norfolk Intl         -4.0  10.94909344
36:            Baltimore Washington Intl         -5.0  10.72673385
37:                         Memphis Intl         -2.5  10.64531435
38:                   Port Columbus Intl         -3.0  10.60132291
39:                  Charleston Afb Intl         -4.0  10.59296847
40:                    Philadelphia Intl         -3.0  10.12719014
41:                  Raleigh Durham Intl         -3.0  10.05238095
42:                    Indianapolis Intl         -3.0   9.94043412
43:            Charlottesville-Albemarle         -5.0   9.50000000
44:               Cleveland Hopkins Intl         -5.0   9.18161129
45:        Ronald Reagan Washington Natl         -2.0   9.06695204
46:                      Burlington Intl         -4.0   8.95099602
47:                 Buffalo Niagara Intl         -5.0   8.94595186
48:                Syracuse Hancock Intl         -5.0   8.90392501
49:                          Denver Intl         -2.0   8.60650021
50:                      Palm Beach Intl         -3.0   8.56297210
51:                             Bob Hope         -3.0   8.17567568
52:       Fort Lauderdale Hollywood Intl         -3.0   8.08212154
53:                          Bangor Intl         -9.0   8.02793296
54:           Asheville Regional Airport         -1.0   8.00383142
55:                      Pittsburgh Intl         -5.0   7.68099053
56:                       Gallatin Field         -2.0   7.60000000
57:                 NW Arkansas Regional         -2.0   7.46572581
58:                           Tampa Intl         -4.0   7.40852503
59:               Charlotte Douglas Intl         -3.0   7.36031885
60:             Minneapolis St Paul Intl         -5.0   7.27016886
61:                      William P Hobby         -4.0   7.17618819
62:                         Bradley Intl        -10.0   7.04854369
63:                     San Antonio Intl         -9.0   6.94537178
64:                      South Bend Rgnl         -3.5   6.50000000
65:     Louis Armstrong New Orleans Intl         -6.0   6.49017497
66:                        Key West Intl          7.0   6.35294118
67:                        Eagle Co Rgnl         -4.0   6.30434783
68:                Austin Bergstrom Intl         -5.0   6.01990875
69:                   Chicago Ohare Intl         -8.0   5.87661475
70:                         Orlando Intl         -5.0   5.45464309
71:               Detroit Metro Wayne Co         -7.0   5.42996346
72:                        Portland Intl         -5.0   5.14157973
73:                        Nantucket Mem         -3.0   4.85227273
74:                      Wilmington Intl         -7.0   4.63551402
75:                    Myrtle Beach Intl        -13.0   4.60344828
76:    Albuquerque International Sunport         -5.5   4.38188976
77:         George Bush Intercontinental         -5.0   4.24079040
78:        Norman Y Mineta San Jose Intl         -7.0   3.44817073
79:               Southwest Florida Intl         -5.0   3.23814963
80:                       San Diego Intl         -5.0   3.13916574
81:              Sarasota Bradenton Intl         -5.0   3.08243131
82:            Metropolitan Oakland Intl         -9.0   3.07766990
83:   General Edward Lawrence Logan Intl         -9.0   2.91439222
84:                   San Francisco Intl         -8.0   2.67289152
85:                         Yampa Valley          2.0   2.14285714
86:              Phoenix Sky Harbor Intl         -6.0   2.09704733
87:            Montrose Regional Airport        -10.5   1.78571429
88:                     Los Angeles Intl         -7.0   0.54711094
89:               Dallas Fort Worth Intl         -9.0   0.32212685
90:                           Miami Intl         -9.0   0.29905978
91:                       Mc Carran Intl         -8.0   0.25772849
92:                  Salt Lake City Intl         -8.0   0.17625459
93:                           Long Beach        -10.0  -0.06202723
94:                Martha\\\\'s Vineyard        -11.0  -0.28571429
95:                  Seattle Tacoma Intl        -11.0  -1.09909910
96:                        Honolulu Intl         -7.0  -1.36519258
97:            John Wayne Arpt Orange Co        -11.0  -7.86822660
98:                    Palm Springs Intl        -13.5 -12.72222222
                                    name median_delay   mean_delay

(b)

merge(flights, planes, by = "tailnum") |> # merge tables to get model name
  _[ , .(avg_speed_mph = mean(distance/(air_time/60), na.rm=TRUE), .N),by = model]|> # calculates averae speed in mph for each model alongside number of flights flown 
  _[avg_speed_mph == max(avg_speed_mph)] # returns only the fastest model
     model avg_speed_mph     N
    <char>         <num> <int>
1: 777-222      482.6254     4
rm(list=ls())